{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2-3、自动微分机制"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "神经网络通常依赖反向传播求梯度来更新网络参数，求梯度过程通常是一件非常复杂而容易出错的事情。\n",
    "\n",
    "而深度学习框架可以帮助我们自动地完成这种求梯度运算。\n",
    "\n",
    "Tensorflow一般使用梯度磁带tf.GradientTape来记录正向运算过程，然后反播磁带自动得到梯度值。\n",
    "\n",
    "这种利用tf.GradientTape求微分的方法叫做Tensorflow的自动微分机制。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 一，利用梯度磁带求导数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(-2.0, shape=(), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np \n",
    "\n",
    "# f(x) = a*x**2 + b*x + c的导数\n",
    "\n",
    "x = tf.Variable(0.0, name=\"x\", dtype=tf.float32)\n",
    "a = tf.constant(1.0)\n",
    "b = tf.constant(-2.0)\n",
    "c = tf.constant(1.0)\n",
    "\n",
    "with tf.GradientTape() as tape:\n",
    "    y = a*tf.pow(x, 2) + b*x + c\n",
    "\n",
    "dy_dx = tape.gradient(y, x)\n",
    "print(dy_dx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(0.0, shape=(), dtype=float32)\n",
      "tf.Tensor(1.0, shape=(), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "# 对常量、张量也可以求导，需要加上 watch\n",
    "with tf.GradientTape() as tape:\n",
    "    tape.watch([a, b, c])\n",
    "    y = a* tf.pow(x, 2) + b*x + c\n",
    "dy_dx, dy_da, dy_db, dy_dc = tape.gradient(y, [x, a, b, c])\n",
    "print(dy_da)\n",
    "print(dy_dc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(2.0, shape=(), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "# 可以求二阶导数\n",
    "with tf.GradientTape() as tape2:\n",
    "    with tf.GradientTape() as tape1:\n",
    "        y = a*tf.pow(x, 2) + b*x + c\n",
    "    dy_dx = tape1.gradient(y, x)\n",
    "dy2_dx2 = tape2.gradient(dy_dx, x)\n",
    "\n",
    "print(dy2_dx2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(-2, 1)\n",
      "(0, 0)\n"
     ]
    }
   ],
   "source": [
    "# 可以在autograph中使用\n",
    "@tf.function\n",
    "def f(x):\n",
    "    a = tf.constant(1.0)\n",
    "    b = tf.constant(-2.0)\n",
    "    c = tf.constant(1.0)\n",
    "    \n",
    "    # 自变量转换 tf.float32\n",
    "    x = tf.cast(x, tf.float32)\n",
    "    with tf.GradientTape() as tape:\n",
    "        tape.watch(x)\n",
    "        y = a*tf.pow(x, 2) + b*x + c\n",
    "    dy_dx = tape.gradient(y, x)\n",
    "    \n",
    "    return ((dy_dx, y))\n",
    "\n",
    "tf.print(f(tf.constant(0.0)))\n",
    "tf.print(f(tf.constant(1.0)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 二、利用梯度磁带和优化器求最小值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y = 0 ; x = 0.999998569\n"
     ]
    }
   ],
   "source": [
    "# 求 f(x) = a*x**2 + b*x + c的最小值\n",
    "# 使用 optimizer.apply_gradients\n",
    "\n",
    "x = tf.Variable(0.0, name='x', dtype=tf.float32)\n",
    "a = tf.constant(1.0)\n",
    "b = tf.constant(-2.0)\n",
    "c = tf.constant(1.0)\n",
    "\n",
    "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)\n",
    "for _ in range(1000):\n",
    "    with tf.GradientTape() as tape:\n",
    "        y = a*tf.pow(x, 2) + b*x + c\n",
    "    dy_dx = tape.gradient(y, x)\n",
    "    optimizer.apply_gradients(grads_and_vars=[(dy_dx, x)])\n",
    "\n",
    "tf.print(\"y =\", y, \"; x =\", x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y = 0 ; x = 0.999998569\n"
     ]
    }
   ],
   "source": [
    "# 求f(x) = a*x**2 + b*x + c的最小值\n",
    "# 使用optimizer.minimize\n",
    "# optimizer.minimize相当于先用tape求gradient,再apply_gradient\n",
    "x = tf.Variable(0.0,name = \"x\",dtype = tf.float32)\n",
    "\n",
    "#注意f()无参数\n",
    "def f():   \n",
    "    a = tf.constant(1.0)\n",
    "    b = tf.constant(-2.0)\n",
    "    c = tf.constant(1.0)\n",
    "    y = a*tf.pow(x,2)+b*x+c\n",
    "    return(y)\n",
    "\n",
    "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)\n",
    "for _ in range(1000):\n",
    "    optimizer.minimize(f, [x])\n",
    "\n",
    "tf.print(\"y =\", f(),\"; x =\", x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "0.999998569\n"
     ]
    }
   ],
   "source": [
    "# 在autograph中完成最小值求解\n",
    "# 使用optimizer.apply_gradients\n",
    "x = tf.Variable(0.0, name = \"x\", dtype = tf.float32)\n",
    "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)\n",
    "\n",
    "@tf.function\n",
    "def minimizef():\n",
    "    a = tf.constant(1.0)\n",
    "    b = tf.constant(-2.0)\n",
    "    c = tf.constant(1.0)\n",
    "    \n",
    "    for _ in tf.range(1000): #注意autograph时使用tf.range(1000)而不是range(1000)\n",
    "        with tf.GradientTape() as tape:\n",
    "            y = a*tf.pow(x,2) + b*x + c\n",
    "        dy_dx = tape.gradient(y,x)\n",
    "        optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])\n",
    "    y = a*tf.pow(x,2) + b*x + c\n",
    "    return y\n",
    "\n",
    "tf.print(minimizef())\n",
    "tf.print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "0.999998569\n"
     ]
    }
   ],
   "source": [
    "# 在autograph中完成最小值求解\n",
    "# 使用optimizer.minimize\n",
    "\n",
    "x = tf.Variable(0.0,name = \"x\",dtype = tf.float32)\n",
    "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   \n",
    "\n",
    "@tf.function\n",
    "def f():   \n",
    "    a = tf.constant(1.0)\n",
    "    b = tf.constant(-2.0)\n",
    "    c = tf.constant(1.0)\n",
    "    y = a*tf.pow(x,2)+b*x+c\n",
    "    return(y)\n",
    "\n",
    "@tf.function\n",
    "def train(epoch):  \n",
    "    for _ in tf.range(epoch):  \n",
    "        optimizer.minimize(f,[x])\n",
    "    return(f())\n",
    "\n",
    "\n",
    "tf.print(train(1000))\n",
    "tf.print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
